do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"


* 1: specify set of variables for prediction

global predvars "praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing"



* 2: put together dataset with all teachers, these variables, and mean residuals

use "$basedata/va_estimates_drift", clear

forv ee=0/5 {
summ alphaZ_ma if e_ma==`ee'
global alphaZ`ee' = r(mean)
}

	use "$basedata/ncerdc_teacher_tests", clear
	collapse (mean) teacher_test_score_std, by(ncerdc_id)
	ren teacher_test_score_std praxis
	tempfile temptest
	save `temptest', replace

	use "$basedata/ncerdc_education", clear
	gen grad_degree = (educational_attainment>=5)
	collapse (max) grad_degree, by(ncerdc_id)
	tempfile tempeducation
	save `tempeducation', replace


	use "$basedata/ncerdc_teacher_certification", clear
	keep ncerdc_id firstyr_cert
	tempfile tempcert
	save `tempcert', replace

	use "$basedata/ncerdc_teacher_license", clear
	ren std_license_fy firstyr_lic
	collapse (min) firstyr_lic, by(ncerdc_id)
	keep ncerdc_id firstyr_lic
	tempfile templic
	save `templic', replace

use "$basedata/allLEA_teacher_year_data", clear
keep if ncerdc_lea=="XXX" // hiding identity
keep ncerdc_id sy Abar_m1_ma Abar_m2_ma
gen app_year = sy-1
sort ncerdc_id app_year


	merge n:1 ncerdc_id using `temptest'
	drop if _m==2
	drop _m
	gen praxisMissing = praxis==.
	replace praxis = 0 if praxis==.

	merge n:1 ncerdc_id using `tempeducation'
	drop if _m==2
	drop _m
	gen grad_degreeMissing = grad_degree==.
	replace grad_degree = 0 if grad_degree==.

	merge n:1 ncerdc_id using `tempcert'
	drop if _m==2
	drop _m
	
	gen certified = (firstyr_cert<=sy) if firstyr_cert!=.
	gen certifiedMissing = certified==.
	replace certified = 0 if certified==.

	merge n:1 ncerdc_id using `templic'
	drop if _m==2
	drop _m
	
	gen licensed = (firstyr_lic<=sy) if firstyr_lic!=.
	gen licensedMissing = licensed==.
	replace licensed = 0 if licensed==.
	
	drop sy

tempfile tempVA
save `tempVA', replace


use "$basedata/estimationdata/All_T_J/All_T_J_teachvar_buff_0_0_lf", clear
sort ncerdc_id app_year
merge n:1 ncerdc_id app_year using `tempVA', update
ren _m merge_VA

replace praxis = 0 if praxisMissing==.
replace praxisMissing = 1 if praxisMissing==.

replace grad_degree = 0 if grad_degreeMissing==.
replace grad_degreeMissing = 1 if grad_degreeMissing==.

replace certified = 0 if certifiedMissing==.
replace certifiedMissing = 1 if certifiedMissing==.

replace licensed = 0 if licensedMissing==.
replace licensedMissing = 1 if licensedMissing==.

gen e_ma = tchr_exp
replace e_ma = ncerdc_exp if (e_ma==. & ncerdc_exp!=.)
gen e_impute = e_ma
replace e_impute = 0 if e_ma==.
replace e_impute = 5 if e_impute>5





label var Abar_m1_ma "VA Advantaged"
label var Abar_m2_ma "VA Disadvantaged"
label var praxis "Praxis"
label var praxisMissing "Praxis missing"
label var grad_degree "Graduate degree"
label var grad_degreeMissing "Graduate degree missing"
label var certified "NBPTS certified"
label var certifiedMissing "NBPTS certified missing"
label var licensed "Regular license"
label var licensedMissing "Regular license missing"


gen hasVA = mu_jt_m1_hat_preY_ma!=. & mu_jt_m2_hat_preY_ma!=.
gen CA = Abar_m2_ma-Abar_m1_ma

reg Abar_m1_ma $predvars if hasVA==0
predict VA1_nohasy, xb
local r2 = e(r2)
estadd ysumm
eststo reg1
reg Abar_m2_ma $predvars if hasVA==0
predict VA2_nohasy, xb
local r2 = e(r2)
estadd ysumm
eststo reg2
reg CA $predvars if hasVA==0
predict VA3_nohasy, xb
local r2 = e(r2)
estadd ysumm
eststo reg3

gen VA1_imputed_0samp = .
gen VA2_imputed_0samp = .
forv e=0/5 {
	replace VA1_imputed_0samp = VA1_nohasy + ${alphaZ`e'} if e_impute==`e'
	replace VA2_imputed_0samp = VA2_nohasy + ${alphaZ`e'} if e_impute==`e'
}


global predvarsintnoL = ""
foreach var in $predvars {
	gen `var'_hasVA = `var' * hasVA
	if "`var'"!="licensed" {
	global predvarsintnoL = "$predvarsintnoL `var'_hasVA"
	}
}

reg Abar_m1_ma $predvars $predvarsintnoL hasVA
gen samp1 = e(sample)
predict VA1_int, xb
qui global bcons_1 = _b[_cons]
foreach var of varlist $predvars {
	qui global b`var'_1 = _b[`var']
}
qui global bpraxisint_1 = _b[praxis_hasVA] + $bpraxis_1
qui global bgrad_degreeint_1 = _b[grad_degree_hasVA] + $bgrad_degree_1
qui global bcertifiedMissingint_1 = _b[certifiedMissing_hasVA] + $bcertifiedMissing_1
qui global bpraxisMissingint_1 = _b[praxisMissing_hasVA] + $bpraxisMissing_1
qui global bconsint_1 = _b[hasVA] + $bcons_1
foreach var in grad_degreeMissing certified licensed licensedMissing {
	qui global b`var'int_1 = _b[`var']
}


reg Abar_m2_ma $predvars $predvarsintnoL hasVA
gen samp2 = e(sample)
predict VA2_int, xb
qui global bcons_2 = _b[_cons]
foreach var of varlist $predvars {
	qui global b`var'_2 = _b[`var']
}
qui global bpraxisint_2 = _b[praxis_hasVA]+$bpraxis_2
qui global bgrad_degreeint_2 = _b[grad_degree_hasVA]+$bgrad_degree_2
qui global bcertifiedint_2 = _b[certified_hasVA]+$bcertified_2
qui global bpraxisMissingint_2 = _b[praxisMissing_hasVA]+$bpraxisMissing_2
qui global bconsint_2 = _b[hasVA]+$bcons_2
foreach var in grad_degreeMissing certifiedMissing licensed licensedMissing {
	qui global b`var'int_2 = _b[`var']
}


reg CA $predvars $predvarsintnoL hasVA
gen samp3 = e(sample)
predict VA3_int, xb
qui global bcons_3 = _b[_cons]
foreach var of varlist $predvars {
	qui global b`var'_3 = _b[`var']
}
qui global bpraxisint_3 = _b[praxis_hasVA]+$bpraxis_3
qui global bgrad_degreeint_3 = _b[grad_degree_hasVA]+$bgrad_degree_3
qui global bcertifiedint_3 = _b[certified_hasVA]+$bcertified_3
qui global bpraxisMissingint_3 = _b[praxisMissing_hasVA]+$bpraxisMissing_3
qui global bconsint_3 = _b[hasVA]+$bcons_3
foreach var in grad_degreeMissing certifiedMissing licensed licensedMissing {
	qui global b`var'int_3 = _b[`var']
}

summ VA*_int if hasVA==0 & Abar_m1_ma!=. & Abar_m2_ma!=.
summ VA*_int if hasVA==1 & Abar_m1_ma!=. & Abar_m2_ma!=.

** Appendix Table 32
esttab reg1 reg2 reg3 ///
using "$tables/imputed_va_reg_0samp.tex", replace se label booktabs ///
nostar   ///
substitute(\_ _)  ///
keep (praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing _cons) ///
stats(ymean r2 N, labels ("Mean DV" "R squared" "N")) nonote nonumbers 
	
forv n=1/3 {
	
global runningtotal1 = 0
global runningtotal2 = 0
qui global mcons1 = 1
qui global mcons2 = 1
qui global d1cons = ${bcons_`n'} * (${mcons1}-${mcons2})
qui global d2cons = ${mcons2} * (${bcons_`n'}-${bconsint_`n'})

global runningtotal1 = ${runningtotal1} + ${d1cons}
global runningtotal2 = ${runningtotal2} + ${d2cons}


foreach var of varlist $predvars {
	qui summ `var' if hasVA==0 & (samp`n'==1) 
	qui global m`var'1 = r(mean)
	qui summ `var' if hasVA==1 & (samp`n'==1) 
	qui global m`var'2 = r(mean)
	
	qui global d1`var' = ${b`var'_`n'} * (${m`var'1}-${m`var'2})
	qui global d2`var' = ${m`var'2} * (${b`var'_`n'}-${b`var'int_`n'})

	global runningtotal1 = ${runningtotal1} + ${d1`var'}
	global runningtotal2 = ${runningtotal2} + ${d2`var'}

}


mat a1 = ($bcons_1,$bconsint_1,$mcons1,$mcons2,$d1cons,$d2cons)
cap drop a1var
gen a1var = .
local i = 1
foreach var of varlist $predvars {
	local i = `i'+1
	mat a`i' = (${b`var'_`n'},${b`var'int_`n'},${m`var'1},${m`var'2},${d1`var'},${d2`var'})
	cap drop a`i'var
	gen a`i'var = .
}
mat alast = (.,.,.,.,${runningtotal1},${runningtotal2})
cap drop alastvar
gen alastvar = .

label var a1var "Constant"
label var a2var "Praxis"
label var a3var "Praxis missing"
label var a4var "Graduate degree"
label var a5var "Graduate degree missing"
label var a6var "NBPTS certified"
label var a7var "NBPTS certified missing"
label var a8var "Regular license"
label var a9var "Regular license missing"
label var alastvar "Total"

forv i=1/9 {
    mat rownames a`i' = a`i'var
}
mat rownames alast = alastvar

mat a = a1\a2\a3\a4\a5\a6\a7\a8\a9\alast

** Appendix Tables A33, A34, A35

frmttable using "$tables/va`n'_impute_decomp_0samp", statmat(a) replace va tex fra ///
	ctitles("","$\beta$ VA New","$\beta$ VA Experienced","Mean New","Mean Experienced","Diff. from X","Diff. from $\beta$") ///
	sdec(2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2\2,2,2,2,2,2) 

}	



preserve
summ VA*_impute, detail
drop if merge_VA==2

* 6: merge these on to dataset for estimation

keep itidx VA*_impute* Abar* mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mu_jt_m1_hat_career_ma_prior mu_jt_m2_hat_career_ma_prior ncerdc_exp 


sort itidx
save "$temp/VAimputed", replace
restore



